Name: Thomas Lin
Andrew: ttlin
Assignment: 2-Gesture Recognizer
Parts 1- Record Your own gesture set.
#A lot of the following code is from Jon Froehlich's Gesture Logger Jupyter Notebook.
#Some of it has been edited better represent this assignment.
#I also used a similar project by dan bunnell for inspiration and to gain intuition
#on how some functions should work
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
#files are made into SensorData files
class SensorData:
def __init__(self, sensorType, currentTimeMs, sensorTimestampNano, x, y, z):
self.sensorType = sensorType
self.currentTimeMs = currentTimeMs.astype(np.int64)
self.sensorTimestampNano = sensorTimestampNano.astype(np.int64)
self.x = x.astype(float)
self.y = y.astype(float)
self.z = z.astype(float)
self.mag = np.sqrt(self.x**2 + self.y**2 + self.z**2)
def get_data(self):
return {"x":self.x, "y":self.y, "z":self.z, "mag":self.mag}
def pad_with_zeros(self, newArrayLength):
self.signalLengthBeforePadding = len(self.x)
arrayLengthDiff = newArrayLength - len(self.x)
if arrayLengthDiff < 0:
raise ValueError("New array length '{}' must be larger than current array length '{}".
format(newArrayLength, len(self.x)))
self.x = np.pad(self.x, (0, arrayLengthDiff), 'constant', constant_values=0)
self.y = np.pad(self.y, (0, arrayLengthDiff), 'constant', constant_values=0)
self.z = np.pad(self.z, (0, arrayLengthDiff), 'constant', constant_values=0)
self.mag = np.pad(self.mag, (0, arrayLengthDiff), 'constant', constant_values=0)
#A gesture's recording is called a tiral
class Trial:
def __init__(self, gestureName, endTimeMs, trialNum, accelLogFilenameWithPath, gyroLogFilenameWithPath):
self.gestureName = gestureName
self.trialNum = trialNum
self.endTimeMs = endTimeMs
self.accelLogFilenameWithPath = accelLogFilenameWithPath
self.gyroLogFilenameWithPath = gyroLogFilenameWithPath
parsedAccelLogData = np.genfromtxt(accelLogFilenameWithPath, delimiter=',',
dtype=str, encoding=None, skip_header=1, unpack=True)
self.accel = SensorData("Accelerometer", *parsedAccelLogData[1:])
parsedGyroLogData = np.genfromtxt(gyroLogFilenameWithPath, delimiter=',',
dtype=str, encoding=None, skip_header=1, unpack=True)
self.gyro = SensorData("Gyroscope", *parsedGyroLogData[1:])
def getEndTimeMsAsString(self):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.endTimeMs / 1000))
from os import listdir
import ntpath
import os
#file parsing functions
def find_csv_filenames( path_to_dir, suffix=".csv" ):
filenames = listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
def parse_and_create_gesture_trials( path_to_dir ):
csvFilenames = find_csv_filenames(path_to_dir)
print("Found {} csv files in {}".format(len(csvFilenames), path_to_dir))
mapGestureNameToTrialList = dict()
mapGestureNameToMapEndTimeMsToMapSensorToFile = dict()
for csvFilename in csvFilenames:
filenameNoExt = os.path.splitext(csvFilename)[0];
filenameParts = filenameNoExt.split("_")
gestureName = filenameParts[0]
sensorName = filenameParts[1]
timeMs = int(filenameParts[2])
if gestureName not in mapGestureNameToMapEndTimeMsToMapSensorToFile:
mapGestureNameToMapEndTimeMsToMapSensorToFile[gestureName] = dict()
if timeMs not in mapGestureNameToMapEndTimeMsToMapSensorToFile[gestureName]:
mapGestureNameToMapEndTimeMsToMapSensorToFile[gestureName][timeMs] = dict()
mapGestureNameToMapEndTimeMsToMapSensorToFile[gestureName][timeMs][sensorName] = csvFilename
print("Found {} gestures".format(len(mapGestureNameToMapEndTimeMsToMapSensorToFile)))
maxArrayLength = -1
trialWithMostSensorEvents = None
for gestureName, mapEndTimeMsToMapSensorToFile in mapGestureNameToMapEndTimeMsToMapSensorToFile.items():
gestureTrialNum = 0
mapGestureNameToTrialList[gestureName] = list()
for endTimeMs in sorted(mapEndTimeMsToMapSensorToFile.keys()):
mapSensorToFile = mapEndTimeMsToMapSensorToFile[endTimeMs]
accelFilenameWithPath = os.path.join(path_to_dir, mapSensorToFile["Accelerometer"])
gyroFilenameWithPath = os.path.join(path_to_dir, mapSensorToFile["Gyroscope"])
gestureTrial = Trial(gestureName, endTimeMs, gestureTrialNum, accelFilenameWithPath, gyroFilenameWithPath)
mapGestureNameToTrialList[gestureName].append(gestureTrial)
if maxArrayLength < len(gestureTrial.accel.x):
maxArrayLength = len(gestureTrial.accel.x)
trialWithMostSensorEvents = gestureTrial
gestureTrialNum = gestureTrialNum + 1
print("Found {} trials for '{}'".format(len(mapGestureNameToTrialList[gestureName]), gestureName))
print("Max trial length across all gesture is '{}' Trial {} with {} sensor events. Resizing all arrays to match".
format(trialWithMostSensorEvents.gestureName, trialWithMostSensorEvents.trialNum, maxArrayLength))
for gestureName, trialList in mapGestureNameToTrialList.items():
for trial in trialList:
trial.accel.pad_with_zeros(maxArrayLength)
trial.gyro.pad_with_zeros(maxArrayLength)
return mapGestureNameToTrialList
def path_leaf(path):
head, tail = ntpath.split(path)
return tail or ntpath.basename(head)
def extract_gesture_name( filename ):
tokenSplitPos = filename.index('_')
gestureName = filename[:tokenSplitPos]
return gestureName
def get_min_num_of_trials( mapGestureToTrials ):
minNumTrials = -1
for gestureName, trialSet in mapGestureToTrials.items():
if minNumTrials == -1 or minNumTrials > len(trialSet):
minNumTrials = len(trialSet)
return minNumTrials
def get_total_num_of_trials (mapGestureToTrials):
numTrials = 0
for gestureName, trialSet in mapGestureToTrials.items():
numTrials = numTrials + len(trialSet)
return numTrials
jonData = './JonGestureLogs'
myData = './MyGestureLogs'
jonGestures = parse_and_create_gesture_trials(jonData)
myGestures = parse_and_create_gesture_trials(myData)
signalLength = len(list(jonGestures.values())[0][0].accel.x)
Visualize both gesture sets (my recordings of the gestures and your recordings of the gestures) in Jupyter Notebook. Your visualizations should include anything that helps you analyze the signals and aid your signal processing and classification approaches. At the very least, you should visualize the raw x, y, and z accelerometer and gyroscope signals as line graphs and visualize some sort of frequency analysis (e.g., spectral density plot, spectrogram). Please appropriately label axes, titles, and include legends.
#This plots the data of the trials
#These plot line graphs of Jon's X, Y, Z and Mag
fig, axes = plt.subplots(len(jonGestures), 4, figsize=(20, 3 * len(jonGestures)))
fig.subplots_adjust(hspace=0.5)
psd_fig, psd_axes = plt.subplots(len(jonGestures), 4, figsize=(20, 3 * len(jonGestures)))
psd_fig.subplots_adjust(hspace=0.5)
index = 0
gestureNamesSorted = sorted(jonGestures.keys())
for gestureName in gestureNamesSorted:
gestureTrials = jonGestures[gestureName]
trialNum = 1
for trial in gestureTrials:
axes[index][0].plot(trial.accel.x, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][1].plot(trial.accel.y, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][2].plot(trial.accel.z, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][3].plot(trial.accel.mag, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][0].psd(trial.accel.x, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][1].psd(trial.accel.y, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][2].psd(trial.accel.z, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][3].psd(trial.accel.mag, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
trialNum = trialNum + 1
axes[index][0].set_title("Jon's " + gestureName + " (Accel X)")
axes[index][0].legend()
axes[index][1].set_title("Jon's " + gestureName + " (Accel Y)")
axes[index][1].legend()
axes[index][2].set_title("Jon's " + gestureName + " (Accel Z)")
axes[index][2].legend()
axes[index][3].set_title("Jon's " + gestureName + " (Accel Mag)")
axes[index][3].legend()
psd_axes[index][0].set_title("Jon's " + gestureName + "(Accel X SD)")
psd_axes[index][0].legend()
psd_axes[index][1].set_title("Jon's " + gestureName + "(Accel Y SD)")
psd_axes[index][1].legend()
psd_axes[index][2].set_title("Jon's " + gestureName + "(Accel Z SD)")
psd_axes[index][2].legend()
psd_axes[index][3].set_title("Jon's " + gestureName + "(Accel Mag SD)")
psd_axes[index][3].legend()
index = index + 1
#These plot line graphs of my X, Y, Z and Mag
fig, axes = plt.subplots(len(myGestures), 4, figsize=(20, 3 * len(myGestures)))
fig.subplots_adjust(hspace=0.5)
psd_fig, psd_axes = plt.subplots(len(myGestures), 4, figsize=(20, 3 * len(myGestures)))
psd_fig.subplots_adjust(hspace=0.5)
index = 0
gestureNamesSorted = sorted(myGestures.keys())
for gestureName in gestureNamesSorted:
gestureTrials = myGestures[gestureName]
trialNum = 1
for trial in gestureTrials:
axes[index][0].plot(trial.accel.x, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][1].plot(trial.accel.y, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][2].plot(trial.accel.z, alpha=0.7, label="Trial {}".format(trialNum))
axes[index][3].plot(trial.accel.mag, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][0].psd(trial.accel.x, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][1].psd(trial.accel.y, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][2].psd(trial.accel.z, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
psd_axes[index][3].psd(trial.accel.mag, NFFT=signalLength, Fs=40, alpha=0.7, label="Trial {}".format(trialNum))
trialNum = trialNum + 1
axes[index][0].set_title("Thomas's " + gestureName + " (Accel X)")
axes[index][0].legend()
axes[index][1].set_title("Thomas's " + gestureName + " (Accel Y)")
axes[index][1].legend()
axes[index][2].set_title("Thomas's " + gestureName + " (Accel Z)")
axes[index][2].legend()
axes[index][3].set_title("Thomas's " + gestureName + " (Accel Mag)")
axes[index][3].legend()
psd_axes[index][0].set_title("Thomas's "+ gestureName + "(Accel X SD)")
psd_axes[index][0].legend()
psd_axes[index][1].set_title("Thomas's " + gestureName + "(Accel Y SD)")
psd_axes[index][1].legend()
psd_axes[index][2].set_title("Thomas's " + gestureName + "(Accel Z SD)")
psd_axes[index][2].legend()
psd_axes[index][3].set_title("Thomas's " + gestureName + "(Accel Mag SD)")
psd_axes[index][3].legend()
index = index + 1
Design and implement a shape-matching gesture recognition approach (e.g., using DTW). What transformations of the signal are necessary here (e.g., smoothing, detrending, etc.)?
Answer: We want to make our signals all follow the same basic shape. We first align them using their maximum height, and do this to all dimensions. We then averaging out the values of each of the signals in every dimension, and this will give us our average shapes.
#We would want to first make our shapes uniform for each gesture by:
#aligning the inputs in the axes by taking the average of all of them
from fastdtw import fastdtw
from scipy.spatial import distance
from scipy.spatial.distance import euclidean
from sklearn.metrics import confusion_matrix
#this is how we define our tests
class testSet:
def __init__(self, testing, training):
self.testing = testing
self.training = training
#This aligns two signals at the max height
def alignTwo( x,y ):
correlation_xy = signal.correlate(x, y)
shift = len(x) - np.argmax(correlation_xy)
return np.roll(x, shift)
#this aligns each gesture to a shape and returns a dictionary of the average
#shapes
def alignGestures(gestureSet):
gestureShapes = dict()
gestureNamesSorted = sorted(gestureSet.keys())
for gesture in gestureNamesSorted:
trials = gestureSet[gesture]
prevx = trials[0].accel.x
prevy = trials[0].accel.y
prevz = trials[0].accel.z
#this stores the gestures shape after they are aligned
x_aligned_trials = np.empty((len(trials), len(prevx)))
y_aligned_trials = np.empty((len(trials), len(prevy)))
z_aligned_trials = np.empty((len(trials), len(prevz)))
#this adds the aligned shapes to the variables above
for num in range(len(trials)):
if (num == 0):
#base case
x_aligned_trials[0] = prevx
y_aligned_trials[0] = prevy
z_aligned_trials[0] = prevz
else:
#aligning the axis
x_aligned_trials[num] = alignTwo(trials[num].accel.x, prevx)
y_aligned_trials[num] = alignTwo(trials[num].accel.y, prevy)
z_aligned_trials[num] = alignTwo(trials[num].accel.z, prevz)
prevx = trials[num].accel.x
prevy = trials[num].accel.y
prevz = trials[num].accel.z
#we can then take the average of each dimension
avg_x = np.mean(x_aligned_trials, axis = 0)
avg_y = np.mean(y_aligned_trials, axis = 0)
avg_z = np.mean(z_aligned_trials, axis = 0)
gestureShapes[gesture] = {
"x" : avg_x,
"y" : avg_y,
"z" : avg_z
}
return gestureShapes
#thsi picks the best gesture for the trial
def checkTrial (trial, gestureShapes):
distValues = dict()
closest = -1
result = ""
for gesture in gestureShapes.keys():
shape = gestureShapes[gesture]
#we first align the shape of the current trial to the
#average shape of the gesture
aligned_x = alignTwo(trial.accel.x, shape["x"])
aligned_y = alignTwo(trial.accel.y, shape["y"])
aligned_z = alignTwo(trial.accel.z, shape["z"])
#we then calculate the difference between the two
diff_x = distance.euclidean(aligned_x,shape["x"])
diff_y = distance.euclidean(aligned_y,shape["y"])
diff_z = distance.euclidean(aligned_z,shape["z"])
#total difference
diff_t = diff_x + diff_y + diff_z
#we then add it to the dictionary of distances
distValues[gesture] = {
"x" : diff_x,
"y" : diff_y,
"z" : diff_z,
"total" : diff_t
}
#we finally check if this is the closest so far
if(closest == -1):
closest = diff_t
result = gesture
elif (diff_t < closest):
closest = diff_t
result = gesture
return result
#this runs the code
def runShapeMatch(tests):
total_tests = 0
total_pass = 0
result = dict()
tested = list()
predicted = list()
labels = sorted(tests[0].training.keys())
for test in tests:
gestureShapes = alignGestures(test.training)
for trial in test.testing:
estGesture = checkTrial(trial,gestureShapes)
if(trial.gestureName not in result):
result[trial.gestureName]= [0,0,0]
total_tests +=1
result[trial.gestureName][2]+=1
if trial.gestureName== estGesture:
result[trial.gestureName][0]+=1
total_pass+=1
else:
result[trial.gestureName][1]+=1
tested.append(trial.gestureName)
predicted.append(estGesture)
for gesture in labels:
print(gesture + " "+ str(result[gesture][0]) + "/" + str(result[gesture][2]) + " | "+ str(result[gesture][0]/result[gesture][2] *100)+"%")
print("Final Count | pass: "+ str(total_pass) + " | total: "+ str(total_tests) +" | "+ str((total_pass/total_tests)*100) + "%")
confusion = confusion_matrix(tested,predicted,labels)
print("Confusion matrix for data")
print(confusion)
jonTest = list()
gestureSorted = sorted(jonGestures.keys())
for t in range(5):
testing = list()
training = dict()
for i in range (len(gestureSorted)):
gesture = gestureSorted[i]
trials = jonGestures[gesture]
testing.append(trials[t])
training[gesture] = np.delete(trials, t, 0)
jonTest.append(testSet(testing,training))
ThomasTest = list()
gestureSorted = sorted(myGestures.keys())
for t in range(5):
testing = list()
training = dict()
for i in range (len(gestureSorted)):
gesture = gestureSorted[i]
trials = myGestures[gesture]
testing.append(trials[t])
training[gesture] = np.delete(trials, t, 0)
ThomasTest.append(testSet(testing,training))
print("TEST OF JON'S DATA")
print("------------------")
runShapeMatch(jonTest)
print("=========================")
print("")
print("=========================")
print("TEST OF THOMAS'S DATA")
print("---------------------")
runShapeMatch(ThomasTest)
Design and implement a model-based gesture recognition approach (e.g., using an SVM or another model of your choosing). What features are most discriminable? How did you encode those features in your model?
Answer: I believe that the features most important are the x, y, and z of the graphs. I accomplished this by training our model based on the x, y and z of these graphs and got the following results.
# The following implements one way of k-folds
from random import randint
fixedFoldsToMapGestureToTrial = None
def generate_kfolds(mapGestureToTrials, numFolds=5, createNewKFold=False):
global fixedFoldsToMapGestureToTrial
if createNewKFold is False and fixedFoldsToMapGestureToTrial is not None:
return fixedFoldsToMapGestureToTrial
# Quick check to make sure that there are numFolds of gesture trials for each gesture
for gestureName, trials in mapGestureToTrials.items():
if numFolds != len(trials):
raise ValueError("For the purposes of this assignment, the number of folds={} must equal the number of trials for each gesture. Gesture '{}' has {} trials"
.format(numFolds, gestureName, len(trials)))
numGestures = len(mapGestureToTrials)
tmpMapGestureToTrials = dict()
for gestureName, trials in mapGestureToTrials.items():
tmpMapGestureToTrials[gestureName] = list(trials)
gestureNames = list(mapGestureToTrials.keys())
# create folds
foldToMapGestureToTrial = list()
for i in range(0, numFolds):
curFoldMapGestureToTrial = dict()
foldToMapGestureToTrial.append(curFoldMapGestureToTrial)
for j in range(0, numGestures):
curGestureName = gestureNames[j]
trialList = tmpMapGestureToTrials[curGestureName]
randTrialIndex = 0
if (len(trialList) > 0):
randTrialIndex = randint(0, len(trialList) - 1)
randTrial = trialList[randTrialIndex]
curFoldMapGestureToTrial[curGestureName] = randTrial
del trialList[randTrialIndex]
if fixedFoldsToMapGestureToTrial is None:
fixedFoldsToMapGestureToTrial = foldToMapGestureToTrial
return foldToMapGestureToTrial
from sklearn import svm
def runSVM(tests):
total_tests = 0
total_pass = 0
result = dict()
tested = list()
predicted = list()
for test in tests:
count = sum(len(trials) for trials in test.training.values())
trainingData = np.empty((count,4))
classLabels= np.empty((count,), dtype=np.dtype('U25'))
i = 0
for gesture in test.training:
for trial in test.training[gesture]:
vector = [trial.accel.x.max(), trial.accel.y.max(), trial.accel.z.max(), trial.accel.mag.max()]
trainingData[i] = vector
classLabels[i] = gesture
i += 1
classifier = svm.SVC()
classifier.fit(trainingData,classLabels)
test_vector = np.empty((len(test.testing),4))
j = 0
for trial in test.testing:
test_vector[j] = [trial.accel.x.max(), trial.accel.y.max(), trial.accel.z.max(), trial.accel.mag.max()]
j +=1
predictions = classifier.predict(test_vector)
t = 0
for trial in test.testing:
trueGesture = trial.gestureName
if(trial.gestureName not in result):
result[trial.gestureName]= [0,0,0]
total_tests +=1
result[trial.gestureName][2]+=1
if trial.gestureName == predictions[t]:
result[trial.gestureName][0]+=1
total_pass+=1
else:
result[trial.gestureName][1]+=1
tested.append(trial.gestureName)
predicted.append(predictions[t])
t+=1
labels = sorted(tests[0].training.keys())
for gesture in labels:
print(gesture + " "+ str(result[gesture][0]) + "/" + str(result[gesture][2]) + " | "+ str(result[gesture][0]/result[gesture][2] *100)+"%")
print("Final Count | pass: "+ str(total_pass) + " | total: "+ str(total_tests) +" | "+ str((total_pass/total_tests)*100) + "%")
confusion = confusion_matrix(tested,predicted,labels)
print("Confusion matrix for data")
print(confusion)
Evaluate your two approaches using k-fold cross validation. For each user (my gesture set and your gesture set), randomly split the data into k-folds (k=5). Use four folds for training and one for testing and repeat this five times (with a different fold reserved for testing each time). You do not need to examine cross-user performance (e.g., training on my gesture set and testing on your gesture set); however, see the Bonus section. For performance metrics, your Notebook should print out the following for both the shape-matching and model-based approaches: (i) overall accuracy; (ii) per-gesture accuracy; (iii) and a confusion matrix.
jonTestFolded = list()
gestureSorted = sorted(jonGestures.keys())
k = 5
randomOffset = np.random.randint(5,size=len(jonGestures))
for t in range(k):
testing = list()
training = dict()
for i in range (len(gestureSorted)):
gesture = gestureSorted[i]
trials = jonGestures[gesture]
offset = randomOffset[i]
test_index = (offset+t) % 5
testing.append(trials[test_index])
training[gesture] = np.delete(trials, t, 0)
jonTestFolded.append(testSet(testing,training))
ThomasTestFolded = list()
gestureSorted = sorted(myGestures.keys())
k = 5
randomOffset = np.random.randint(5,size=len(myGestures))
for t in range(k):
testing = list()
training = dict()
for i in range (len(gestureSorted)):
gesture = gestureSorted[i]
trials = myGestures[gesture]
offset = randomOffset[i]
test_index = (offset+t) % 5
testing.append(trials[test_index])
training[gesture] = np.delete(trials, t, 0)
ThomasTestFolded.append(testSet(testing,training))
print("TEST OF JON'S FOLDED DATA")
print("-------------------------")
runShapeMatch(jonTestFolded)
print("==============================")
print("")
print("==============================")
print("TEST OF THOMAS'S FOLDED DATA")
print("----------------------------")
runShapeMatch(ThomasTestFolded)
print("TEST OF JON'S FOLDED DATA")
print("-------------------------")
runSVM(jonTestFolded)
print("==============================")
print("")
print("==============================")
print("TEST OF THOMAS'S FOLDED DATA")
print("----------------------------")
runSVM(ThomasTestFolded)